## [1] "MMSD-P2" "MMSD-P7" "MMSD-P8" "MMSD-P11" "MMSD-P18" "Madison"
The two data sets used in this analysis are the Madison case data sourced from the Wisconsin DHS and wastewater concentration data produced by the Wisconsin State Laboratory of Hygiene. This wastewater data has entries every couple of days from 25 January 2021 to 04 January 2022.
| Date | Site | FirstConfirmed | SevenDayMACases | N1 | BCoV | N2 | PMMoV | GeoMeanN12 | FlowRate | temperature | equiv_sewage_amt |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 2021-01-25 | MMSD-P11 | 8 | 10.571429 | 280459 | 8.99 | 354015 | 31434596 | 315097.91 | 9.31 | NA | 1 |
| 2021-01-25 | MMSD-P18 | 17 | 8.571429 | 310278 | 10.37 | 447153 | 29445680 | 372480.52 | 11.82 | NA | 1 |
| 2021-01-25 | MMSD-P2 | 13 | 9.571429 | 62166 | 11.38 | 94675 | 35099462 | 76717.44 | 5.74 | NA | 1 |
| 2021-01-25 | MMSD-P7 | 5 | 10.000000 | 64345 | 10.95 | 107125 | 22900169 | 83023.84 | 4.35 | NA | 1 |
| 2021-01-25 | MMSD-P8 | 4 | 12.000000 | 66705 | 11.63 | 110510 | 22716404 | 85857.85 | 6.06 | NA | 1 |
| 2021-01-28 | MMSD-P11 | 19 | 24.857143 | 1107390 | 1.98 | 1098130 | 24862163 | 1102750.28 | 9.07 | NA | 1 |
The case data has a strong weekend effect so for this section we look at a seven day smoothing of cases. The simple display of the data shows the core components of this story. First, wastewater data is noisy. And that there is a clear relationship between the two signals.
FirstImpresionFunc <- function(DF){
FirstImpressionDF <- DF%>%
NoNa(SelectedIndVar,SelectedDepVar)#Removing NA
FirstImpressionGGplot = FirstImpressionDF%>%
ggplot(aes(x=Date))+#Data depends on time
geom_point(aes(y=!!sym(SelectedDepVar), color=SelectedDepVar,info=!!sym(SelectedDepVar)),size = 1)+
geom_line(aes(y=MinMaxFixing(!!sym(SelectedIndVar),!!sym(SelectedDepVar)),
color=SelectedIndVar,
info=!!sym(SelectedIndVar)))+#compares SelectedIndVar to Cases
geom_line(aes(y=(SevenDayMACases),
color="Seven Day MA Cases",
info=SevenDayMACases))+
labs(y="Reported cases")+
facet_wrap(~Site)
return(ggplotly(FirstImpressionGGplot))
}
FirstImpresionFunc(filter(FullDF,Site=="Madison"))
FirstImpresionFunc(filter(FullDF,Site=="MMSD-P2"))
FirstImpresionFunc(filter(FullDF,Site=="MMSD-P7"))
FirstImpresionFunc(filter(FullDF,Site=="MMSD-P8"))
FirstImpresionFunc(filter(FullDF,Site=="MMSD-P11"))
FirstImpresionFunc(filter(FullDF,Site=="MMSD-P18"))
Looking at the wastewater measurements we observe there were some points many times larger than adjacent values hinting at them being outliers. We used the adjacent 10 values on each side and marked points 2.5 standard deviations away from the group mean as outliers.
OutlierRemoveFunc <- function(DF){
ErrorMarkedDF <- DF%>%#
mutate(FlagedOutliers = IdentifyOutliers(!!sym(SelectedIndVar),Bin = 21, Action = "Flag"),
#Manual flagging that method misses due to boundary effect with binning
FlagedOutliers = ifelse(Date == mdy("01/26/2021") | Date == mdy("01/27/2021"),
TRUE, FlagedOutliers),
NoOutlierVar = ifelse(FlagedOutliers, NA, !!sym(SelectedIndVar)))
return(ErrorMarkedDF)
}
MadDF <- OutlierRemoveFunc(filter(FullDF,Site=="Madison"))
P2DF <- OutlierRemoveFunc(filter(FullDF,Site=="MMSD-P2"))
P7DF <- OutlierRemoveFunc(filter(FullDF,Site=="MMSD-P7"))
P8DF <- OutlierRemoveFunc(filter(FullDF,Site=="MMSD-P8"))
P11DF <- OutlierRemoveFunc(filter(FullDF,Site=="MMSD-P11"))
P18DF <- OutlierRemoveFunc(filter(FullDF,Site=="MMSD-P18"))
OutlierPlotFunc <- function(DF){
#Split N1 into outlier and non outlier for next ggplot
OutLierPlotDF <- DF%>%
mutate(!!OutlierName := ifelse(FlagedOutliers,!!sym(SelectedIndVar),NA))%>%
mutate(!!SelectedIndVar := NoOutlierVar)
OutLierPlotObject <- OutLierPlotDF%>%
filter(!(is.na(!!sym(SelectedIndVar))&is.na(!!sym(OutlierName))))%>%
ggplot(aes(x=Date))+#Data depends on time
geom_line(aes(y=!!sym(SelectedIndVar),
color=SelectedIndVar,
info = !!sym(SelectedIndVar)))+#compares Var to Cases
geom_point(aes(y=!!sym(OutlierName),
color=OutlierName,
info = !!sym(OutlierName)))+
ColorRule+
facet_wrap(~Site)
#mentioned hand picked list other choices
ReturnPlot <- ggplotly(OutLierPlotObject,tooltip=c("info","Date"))%>%
layout(yaxis = SecondAxisFormat,
legend=list(title=list(text=''),x = 1.15, y = 0.9),
margin = list(l = 50, r = 75, b = 50, t = 50, pad = 4))
return(ReturnPlot)
}
OutlierPlotFunc(MadDF)
OutlierPlotFunc(P2DF)
OutlierPlotFunc(P7DF)
OutlierPlotFunc(P8DF)
OutlierPlotFunc(P11DF)
OutlierPlotFunc(P18DF)
PrepOutlierFunc <- function(DF){
#Drop Var create Var filter
UpdatedDF <- DF%>%
select(-sym(SelectedIndVar))%>%
rename(!!sym(SelectedIndVar) := NoOutlierVar)
return(UpdatedDF)
}
MadDF <- PrepOutlierFunc(MadDF)
P2DF <- PrepOutlierFunc(P2DF)
P7DF <- PrepOutlierFunc(P7DF)
P8DF <- PrepOutlierFunc(P8DF)
P11DF <- PrepOutlierFunc(P11DF)
P18DF <- PrepOutlierFunc(P18DF)
LoessPlotFunc <- function(DF,SpanConstant = .163){
MainDF <- DF
MainDF[[loessVar]] <- loessFit(y=(MainDF[[SelectedIndVar]]),
x=MainDF$Date, #create loess fit of the data
span=SpanConstant, #span of .2 seems to give the best result, not rigorously chosen
iterations=2)$fitted#2 iterations remove some bad patterns
MainDF <- MainDF%>%
NoNa(loessVar,"SevenDayMACases")
SLDLoessGraphic <- MainDF%>%
ggplot(aes(x=Date))+
geom_line(aes(y=!!sym(SelectedDepVar), color=SelectedDepVar , info = !!sym(SelectedDepVar)),alpha=.1)+
geom_line(aes(y=MinMaxFixing(!!sym(SelectedIndVar),!!sym(SelectedDepVar)),
color=SelectedIndVar,
info = !!sym(SelectedIndVar)),
alpha=.2)+
geom_line(aes(y=SevenDayMACases,
color="Seven Day MA Cases" ,
info = SevenDayMACases))+
geom_line(aes(y=MinMaxFixing(!!sym(loessVar),!!sym(SelectedDepVar),!!sym(SelectedIndVar)),
color=loessVar ,
info = !!sym(loessVar)))+
facet_wrap(~Site)+
labs(y="Reported cases")
ReturnPlot <- ggplotly(SLDLoessGraphic,tooltip=c("info","Date"))%>%
add_lines(x=~Date, y=MainDF[[SelectedIndVar]],
yaxis="y2", data=MainDF, showlegend=FALSE, inherit=FALSE) %>%
layout(yaxis2 = SecondAxisFormat,
legend=list(title=list(text=''),x = 1.15, y = 0.9),
margin = list(l = 50, r = 75, b = 50, t = 50, pad = 4))
return(ReturnPlot)
}
LoessPlotFunc(MadDF)
LoessPlotFunc(P2DF)
LoessPlotFunc(P7DF)
LoessPlotFunc(P8DF)
LoessPlotFunc(P11DF)
LoessPlotFunc(P18DF)